{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# New to Jupyter notebook? Check out the Jupyter notebook \n",
    "# documentation (https://jupyter-notebook.readthedocs.io/en/stable/ui_components.html), \n",
    "# and try the User interface tour in the Help menu above."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# BlockSci Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import blocksci\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker\n",
    "import collections\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "%matplotlib notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# parser_data_directory should be set to the data-directory which the blocksci_parser output\n",
    "chain = blocksci.Blockchain(parser_data_directory)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Measuring different types of address use"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time net_coins_per_block = chain.map_blocks(lambda block: block.net_address_type_value())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(net_coins_per_block).fillna(0).cumsum()/1e8\n",
    "df = chain.heights_to_dates(df)\n",
    "df = df.rename(columns={t:str(t) for t in df.columns})\n",
    "ax = df.resample(\"W\").mean().plot()\n",
    "ax.set_ylim(ymin=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Visualizing Transaction Fees for an individual block"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "example_block_height = 465100\n",
    "df = pd.DataFrame(chain[example_block_height].txes.fee_per_byte(), columns=[\"Satoshis per byte\"])\n",
    "ax = df.reset_index().plot.scatter(x=\"index\", y=\"Satoshis per byte\")\n",
    "ax.set_ylim(0)\n",
    "ax.set_xlim(0)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Identifying Overlay Applications"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time txes = chain.blocks.txes.including_output_of_type(blocksci.address_type.nulldata).all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time labels = [(tx.block.time, blocksci.label_application(tx)) for tx in txes]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(labels, columns=[\"date\", \"label\"])\n",
    "df = df.reset_index().groupby([\"date\", \"label\"]).count().unstack(level=-1).fillna(0)\n",
    "df.columns = df.columns.droplevel()\n",
    "important_columns = list(df[df.index > pd.to_datetime(\"1-1-2016\")].sum().sort_values()[-10:].index)\n",
    "important_columns = [x for x in important_columns if \"Address\" not in x]\n",
    "ax = df[df.index > pd.to_datetime(\"1-1-2016\")].cumsum().resample(\"w\").mean()[important_columns].plot()\n",
    "ax.set_ylim(0)\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Average Fee per Transaction in 2017 in USD"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "converter = blocksci.CurrencyConverter()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# BlockSci vectorized interface\n",
    "blocks = chain.range('2017')\n",
    "fees = blocks.fee / blocks.tx_count\n",
    "times = blocks.time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame({\"Fee\":fees}, index=times)\n",
    "df = converter.satoshi_to_currency_df(df, chain)\n",
    "ax = df.resample(\"d\").mean().plot(legend=False)\n",
    "ax.set_ylim(ymin=0)\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Transactions Paying over 1000 dollars in fees"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%time high_fee_txes = chain.cpp.filter_tx(\"fee(tx) > 10000000\", 0, len(chain))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "converter = blocksci.CurrencyConverter()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame([(tx.block.height, tx.fee) for tx in high_fee_txes], columns=[\"height\", \"fee\"])\n",
    "df.index = df[\"height\"]\n",
    "df[\"fee\"] = df.apply(lambda x: converter.satoshi_to_currency(x[\"fee\"], chain[x[\"height\"]].time), axis=1)\n",
    "df = df[df[\"fee\"] > 1000]\n",
    "df = chain.heights_to_dates(df)\n",
    "fig, ax = plt.subplots()\n",
    "ax.plot_date(df.index, df[\"fee\"], fmt=\"x\")\n",
    "ax.set_yscale(\"log\")\n",
    "formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)\n",
    "formatter.set_scientific(False)\n",
    "ax.yaxis.set_major_formatter(formatter)\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Balance held by cluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import blocksci.cluster_python\n",
    "# cluster_data_directory should be set to the directory containing the\n",
    "# output of the `clusterer` program\n",
    "cm = blocksci.cluster_python.ClusterManager(cluster_data_directory, chain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Find the balance held by the cluster containing the given address at block height 494653 \n",
    "address = chain.address_from_string(\"3EhLZarJUNSfV6TWMZY1Nh5mi3FMsdHa5U\")\n",
    "cluster = cm.cluster_with_address(address)\n",
    "sum(address.balance(494653) for address in cluster.addresses) / 1e8"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
